Covered
<
<
the dotted line is also the alpha = 0.05
# Dot plot of pine needle lengths
ggplot(pine_data, aes(x = 0, y = length_mm)) +
geom_point(size = 2, alpha = 0.5,
position = position_dodge2(width=.15)) +
# geom_jitter(width = 0.1, height = .05, size = 2, alpha = 0.5) +
labs(title = "Pine Needle Length Distribution",
x = "Length (mm)",
y = "") +
scale_x_continuous(limits = c(-.5, .5))+
theme_minimal()
<
# Let's apply a log transformation to our pine needle data
lt_df <- read_csv("data/lake_trout.csv")
# Let's apply a log transformation to our pine needle data
lt_df <- lt_df %>%
mutate(log_mass = log10(mass_g +1))
# Create before and after plots to show transformation effect
lt_hist_1_plot <- ggplot(lt_df, aes(x = mass_g)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black") +
geom_density(alpha = 0.5) +
labs(title = "Original Data", x = "Length (mm)", y = "Count") +
theme_minimal()
lt_qq_1_plot <- ggplot(lt_df, aes(sample = mass_g)) +
geom_qq() +
geom_qq_line() +
labs(title = "QQ Plot - Original", x = "Theoretical Quantiles", y = "Sample Quantiles") +
theme_minimal()
lt_hist_2_log_plot <- ggplot(lt_df, aes(x = log_mass)) +
geom_histogram(bins = 10, fill = "lightgreen", color = "black") +
geom_density(alpha = 0.5) +
labs(title = "Log-Transformed Data", x = "log10(Length)", y = "Count") +
theme_minimal()
lt_qq_2_log_plot <- ggplot(lt_df, aes(sample = log_mass)) +
geom_qq() +
geom_qq_line() +
labs(title = "QQ Plot - Log-Transformed", x = "Theoretical Quantiles", y = "Sample Quantiles") +
theme_minimal()
# Combine plots
(lt_hist_1_plot + lt_qq_1_plot) / (lt_hist_2_log_plot + lt_qq_2_log_plot)+
plot_annotation(
title = "Lake Trout Mass and Log(Mass+1)"
)<
<
Common assumptions for tests:
What can we do if our data violates these assumptions?
Alternatives
In this activity, we’ve:
Key takeaways:
Things that stood out
What does not make sense or what questions do you have…
What makes you nervous?